home *** CD-ROM | disk | FTP | other *** search
- {*********************************************************}
- {* AAExtMge *}
- {* Copyright (c) Julian M Bucknall 2000 *}
- {* All rights reserved. *}
- {*********************************************************}
- {* Algorithms Alfresco: External Mergesort *}
- {*********************************************************}
-
- {Note: this unit is released as freeware. In other words, you are free
- to use this unit in your own applications, however I retain all
- copyright to the code. JMB}
-
- unit AAExtMge;
-
- interface
-
- uses
- Windows,
- SysUtils;
-
- type
- {function prototype to compare two items;
- returns integer <0 if Item1<Item2, =0 if equal, >0 otherwise}
- TaaMergeCompare = function (const aItem1, aItem2 : pointer) : integer;
-
-
- procedure aaMergesortFixed(const aInFile : string;
- const aOutFile : string;
- aRecLen : integer;
- aCompare : TaaMergeCompare);
- {-mergesorts the input file to produce the output file; the input
- file is assumed to contain fixed length records of size aRecLen}
-
- implementation
-
- uses
- Classes;
-
- {===TaaTempFileStream================================================}
- type
- TaaTempFileStream = class(TFileStream)
- private
- FFileName : string;
- FDelete : boolean;
- protected
- public
- constructor Create(const aPath : string; aMode : word);
- destructor Destroy; override;
-
- property DeleteOnDestroy : boolean read FDelete write FDelete;
- property FileName : string read FFileName;
- end;
- {--------}
- constructor TaaTempFileStream.Create(const aPath : string; aMode : word);
- var
- PathNameZ : array [0..MAX_PATH] of char;
- FileNameZ : array [0..MAX_PATH] of char;
- begin
- {get the path for temporary files}
- if (aPath = '') then
- GetTempPath(sizeof(PathNameZ), PathNameZ)
- else
- StrLCopy(PathNameZ, PChar(aPath), sizeof(PathNameZ));
- {create a temporary file}
- GetTempFileName(PathNameZ, 'AA', 0, FileNameZ);
- FFileName := FileNameZ;
- {this last step will have created the file, so open it}
- inherited Create(FileName, aMode);
- end;
- {--------}
- destructor TaaTempFileStream.Destroy;
- begin
- {close the file}
- inherited Destroy;
- {if we're asked to delete the file, do so}
- if DeleteOnDestroy then
- DeleteFile(FileName);
- end;
- {====================================================================}
-
-
- {====================================================================}
- function ReadRecFixed(aStream : TStream;
- var aBuffer;
- aRecLen : integer) : boolean;
- var
- BytesRead : longint;
- begin
- BytesRead := aStream.Read(aBuffer, aRecLen);
- Result := BytesRead = aRecLen;
- end;
- {--------}
- procedure SelectionSort(aBlock : pointer;
- aRecCount : integer;
- aRecLen : integer;
- aCompare : TaaMergeCompare);
- var
- i, j : integer;
- TempRec : pointer;
- iPtr : PChar;
- jPtr : PChar;
- MinPtr : PChar;
- begin
- GetMem(TempRec, aRecLen);
- try
- iPtr := aBlock;
- for i := 0 to (aRecCount - 2) do begin
- MinPtr := iPtr;
- jPtr := iPtr;
- for j := succ(i) to pred(aRecCount) do begin
- inc(jPtr, aRecLen);
- if (aCompare(jPtr, MinPtr) < 0) then
- MinPtr := jPtr;
- end;
- Move(iPtr^, TempRec^, aRecLen);
- Move(MinPtr^, iPtr^, aRecLen);
- Move(TempRec^, MinPtr^, aRecLen);
- inc(iPtr, aRecLen);
- end;
- finally
- FreeMem(TempRec);
- end;
- end;
- {--------}
- procedure SplitFileFixed(aInFile : TStream;
- aF1 : TStream;
- aF2 : TStream;
- aRecLen : integer;
- aCompare: TaaMergeCompare);
- const
- FirstFile = false;
- SecondFile = true;
- var
- Rec1 : pointer;
- Rec2 : pointer;
- F : array [boolean] of TStream;
- Have1st : boolean;
- Have2nd : boolean;
- Use1st1st : boolean;
- DestFile : boolean;
- begin
- F[FirstFile] := aF1;
- F[SecondFile] := aF2;
- Rec1 := nil;
- Rec2 := nil;
- try
- {allocate the record buffers}
- GetMem(Rec1, aRecLen);
- GetMem(Rec2, aRecLen);
- {we start out with the first output file}
- DestFile := FirstFile;
- {read the first two records}
- Have1st := ReadRecFixed(aInFile, Rec1^, aRecLen);
- Have2nd := ReadRecFixed(aInFile, Rec2^, aRecLen);
- {in a loop read the records in pairs and write them in sequence to
- the output file, alternating between output files; the loop stops
- when we can't read any more records}
- while Have1st do begin
- {order the two records}
- if Have2nd then
- Use1st1st := aCompare(Rec1, Rec2) <= 0
- else
- Use1st1st := true;
- {write them out in order to the current output file}
- if Use1st1st then begin
- F[DestFile].WriteBuffer(Rec1^, aRecLen);
- if Have2nd then
- F[DestFile].WriteBuffer(Rec2^, aRecLen);
- end
- else begin
- F[DestFile].WriteBuffer(Rec2^, aRecLen);
- F[DestFile].WriteBuffer(Rec1^, aRecLen);
- end;
- {switch output files}
- DestFile := not DestFile;
- {read the next two records}
- Have1st := ReadRecFixed(aInFile, Rec1^, aRecLen);
- Have2nd := ReadRecFixed(aInFile, Rec2^, aRecLen);
- end;
- finally
- if (Rec2 <> nil) then
- FreeMem(Rec2);
- if (Rec1 <> nil) then
- FreeMem(Rec1);
- end;
- end;
- {--------}
- function SplitFileFixedBlock(aInFile : TStream;
- aF1 : TStream;
- aF2 : TStream;
- aRecLen : integer;
- aCompare: TaaMergeCompare) : integer;
- const
- FirstFile = false;
- SecondFile = true;
- var
- Block : pointer;
- F : array [boolean] of TStream;
- DestFile : boolean;
- BlockSize : integer;
- BytesRead : longint;
- RecCount : integer;
- begin
- F[FirstFile] := aF1;
- F[SecondFile] := aF2;
- Block := nil;
- try
- {allocate the block buffer}
- Result := (128 * 1024) div aRecLen;
- BlockSize := Result * aRecLen;
- GetMem(Block, BlockSize);
- {we start out with the first output file}
- DestFile := FirstFile;
- {read the first block}
- BytesRead := aInFile.Read(Block^, BlockSize);
- RecCount := BytesRead div aRecLen;
- {in a loop sort the block and write it to the output file,
- alternating between output files; the loop stops when we can't
- read any more blocks}
- while (RecCount <> 0) do begin
- {sort the block}
- SelectionSort(Block, RecCount, aRecLen, aCompare);
- {write out the sorted block to the current output file}
- F[DestFile].WriteBuffer(Block^, BytesRead);
- {switch output files}
- DestFile := not DestFile;
- {read the next block}
- BytesRead := aInFile.Read(Block^, BlockSize);
- RecCount := BytesRead div aRecLen;
- end;
- finally
- if (Block <> nil) then
- FreeMem(Block);
- end;
- end;
- {--------}
- function MergeRunsFixed(aF1 : TStream;
- aF2 : TStream;
- aG1 : TStream;
- aG2 : TStream;
- aRecLen : integer;
- aRunLen : integer;
- aCompare: TaaMergeCompare) : boolean;
- const
- FirstFile = false;
- SecondFile = true;
- type
- {a record that describes the processing of a single input file}
- TInputFile = packed record
- ifStrm : TStream; {stream}
- ifRec : pointer; {record buffer}
- ifRecsInRun : integer; {records to go in run}
- ifEOF : boolean; {stream is exhausted}
- end;
- var
- F : array[boolean] of TInputFile;
- G : array [boolean] of TStream;
- SrcFile : boolean;
- DestFile : boolean;
- FileId : boolean;
- begin
- {assume that this merge pass will finish completely}
- Result := true;
- {initialize the input file records}
- with F[FirstFile] do begin
- ifStrm := aF1;
- ifRec := nil;
- ifRecsInRun := 0;
- ifEOF := false;
- end;
- with F[SecondFile] do begin
- ifStrm := aF2;
- ifRec := nil;
- ifRecsInRun := 0;
- ifEOF := false;
- end;
- {set up the output files}
- G[FirstFile] := aG1;
- G[SecondFile] := aG2;
- try
- {clear the output streams}
- {NOTE: this only works for Delphi 3 and above, since only
- their TStreams have a SetSize accessor method}
- G[FirstFile].Size := 0;
- G[SecondFile].Size := 0;
- {reset the input streams, allocate the record buffers,
- and set the EOF flags}
- for FileId := FirstFile to SecondFile do
- with F[FileId] do begin
- ifStrm.Seek(0, soFromBeginning);
- GetMem(ifRec, aRecLen);
- ifEOF := ifStrm.Size = 0;
- end;
- {make sure the first output goes to G1}
- DestFile := FirstFile;
- {cycle until we manage to exhaust both input files}
- while (not F[FirstFile].ifEOF) or
- (not F[SecondFile].ifEOF) do begin
- {if we start writing to the second file, we won't finish
- the merge process this time}
- if (DestFile = SecondFile) then
- Result := false;
- {initialize ready for merging next runs}
- F[FirstFile].ifRecsInRun := aRunLen;
- F[SecondFile].ifRecsInRun := aRunLen;
- {read the first two records in the respective runs}
- with F[FirstFile] do
- if ReadRecFixed(ifStrm, ifRec^, aRecLen) then
- dec(ifRecsInRun)
- else begin
- ifRecsInRun := -1;
- ifEOF := true;
- end;
- with F[SecondFile] do
- if ReadRecFixed(ifStrm, ifRec^, aRecLen) then
- dec(ifRecsInRun)
- else begin
- ifRecsInRun := -1;
- ifEOF := true;
- end;
- {merge the two runs--one from F1 and the other from F2}
- while ((F[FirstFile].ifRecsInRun >= 0) or
- (F[SecondFile].ifRecsInRun >= 0)) do begin
- {find the smaller record of the two current ones}
- {if the run from F1 is exhausted then the record from
- F2 is the 'smaller'}
- if (F[FirstFile].ifRecsInRun < 0) then
- SrcFile := SecondFile
- {if the run from F2 is exhausted then the record from
- F1 is the 'smaller'}
- else if (F[SecondFile].ifRecsInRun < 0) then
- SrcFile := FirstFile
- {otherwise we need to actually compare the records to
- find the smaller}
- else
- SrcFile :=
- aCompare(F[FirstFile].ifRec, F[SecondFile].ifRec) > 0;
- {write the smaller record to the current output file}
- G[DestFile].WriteBuffer(F[SrcFile].ifRec^, aRecLen);
- {read the next record from the file whose record we just used}
- with F[SrcFile] do
- if (ifRecsInRun <= 0) then
- ifRecsInRun := -1
- else if ReadRecFixed(ifStrm, ifRec^, aRecLen) then
- dec(ifRecsInRun)
- else begin
- ifRecsInRun := -1;
- ifEOF := true;
- end
- end;
- {having merged two runs, switch output files}
- DestFile := not DestFile;
- end;
- finally
- if (F[SecondFile].ifRec <> nil) then
- FreeMem(F[SecondFile].ifRec);
- if (F[FirstFile].ifRec <> nil) then
- FreeMem(F[FirstFile].ifRec);
- end;
- end;
- {--------}
- procedure aaMergesortFixed(const aInFile : string;
- const aOutFile : string;
- aRecLen : integer;
- aCompare : TaaMergeCompare);
- var
- InFile : TFileStream;
- F : array [1..2] of TaaTempFileStream;
- G : array [1..2] of TaaTempFileStream;
- Merged : boolean;
- FIsSrc : boolean;
- RunLen : integer;
- Path : string;
- MergedFileName : string;
- begin
- Assert(aInFile <> '', 'Input file name cannot be blank');
- Assert(aOutFile <> '', 'Output file name cannot be blank');
- Assert(aRecLen > 0, 'Record length must be a positive number');
- Assert(Assigned(aCompare), 'Compare function must be set');
- InFile := nil;
- F[1] := nil;
- F[2] := nil;
- G[1] := nil;
- G[2] := nil;
- try
- {open the file to be sorted}
- InFile := TFileStream.Create(aInFile, fmOpenRead+fmShareDenyWrite);
- {split the file into the first two file components}
- Path := ExtractFilePath(aOutFile);
- {split the input file into two files containing runs of length 2}
- F[1] := TaaTempFileStream.Create(Path, fmOpenReadWrite);
- F[2] := TaaTempFileStream.Create(Path, fmOpenReadWrite);
- // SplitFileFixed(InFile, F[1], F[2], aRecLen, aCompare);
- // RunLen := 2;
- RunLen := SplitFileFixedBlock(InFile, F[1], F[2], aRecLen, aCompare);
- {perform the first merge pass}
- G[1] := TaaTempFileStream.Create(Path, fmOpenReadWrite);
- G[2] := TaaTempFileStream.Create(Path, fmOpenReadWrite);
- Merged := MergeRunsFixed(F[1], F[2], G[1], G[2],
- aRecLen, RunLen, aCompare);
- {now we continually merge the runs until we end up with a single
- file containing all the records}
- FIsSrc := true;
- while not Merged do begin
- RunLen := RunLen * 2;
- FIsSrc := not FIsSrc;
- if FIsSrc then
- Merged := MergeRunsFixed(F[1], F[2], G[1], G[2],
- aRecLen, RunLen, aCompare)
- else
- Merged := MergeRunsFixed(G[1], G[2], F[1], F[2],
- aRecLen, RunLen, aCompare);
- end;
- {we've now merged all the records into either F1 or G1; rename the
- file containing all the records to the output file name, and
- then delete the other three temporaries}
- if FIsSrc then begin
- MergedFileName := G[1].FileName;
- F[1].DeleteOnDestroy := true;
- end
- else begin
- MergedFileName := F[1].FileName;
- G[1].DeleteOnDestroy := true;
- end;
- F[2].DeleteOnDestroy := true;
- G[2].DeleteOnDestroy := true;
- finally
- G[2].Free;
- G[1].Free;
- F[2].Free;
- F[1].Free;
- InFile.Free;
- end;
- RenameFile(MergedFileName, aOutFile);
- end;
-
- end.
-
-